/************************************************************************
Purpose: 	Calculating a basic potential max. pollution measure for firms

Notes:		Conservative estimates have been used to generate the measures.
			1. Measures of flow are assumed to be constant across the chain (this is not true,
			the measures of max. pollution here are assuming no APCD devices and hence flow would be higher,
			but hence this makes this measure more conservative by assuming lower flow values)
			
			2. Assumption of outlet concentration is taken to be 2500 mg/Nm3 (this is a conservatively plausible estimate,
			high loads can be even higher)
			
			3. Operating hours are taken to be 12 hours and 16 hours (to account for overall closures, and low production periods)
*************************************************************************/

set more off 
clear matrix
clear all
pause on

import excel "$EMISSIONS_DATA_IN/master-data_flow-data_stack-level_surat-sample_20-04-2022_v3.xlsx", firstrow clear

// drop FlowRange AverageflowCalib Averageflow Efrac16 Efrac12 R AverageUncontrolledMassmonth

label var composite_id "Composite ID"
label var industry_id "Industry ID"
label var gpcb_id "GPCB ID" 
label var treatmentstatus "Treatment Status" 
label var num_stacks "Number of Stacks"
label var total_heat_output "Total Heat Output (Boiler/TFH/HAG)"
label var flow_2020_m3ps "Flow in m3/s (March 2020)" 
label var flow_2022_m3ps "Flow in m3/s (March 2022)" 
label var flow_2020_m3ph "Flow in m3/h (March 2020)" 
label var flow_2020_m3ph "Flow in m3/h (March 2022)" 
label var flow_etsbl_m3ph "Flow in m3/h (ETS Baseline)" 
label var flow_cemsbl_m3ph "Flow in m3/s (CEMS Baseline 2014)" 
label var AveragePMMassmonth "Average PM Mass (kg)/month (Sample Period)"

egen FlowRangeMax = rowmax(flow_2020_m3ph flow_2022_m3ph flow_etsbl_m3ph flow_cemsbl_m3ph)
egen FlowRangeMin = rowmin(flow_2020_m3ph flow_2022_m3ph flow_etsbl_m3ph flow_cemsbl_m3ph)
gen FlowRangeR = FlowRangeMax - FlowRangeMin
egen AverageFlowCalibr = rmean(flow_2020_m3ph flow_2022_m3ph)
egen AverageFlowAll = rmean(flow_2020_m3ph flow_2022_m3ph flow_etsbl_m3ph flow_cemsbl_m3ph)

label var AverageFlowAll "Average across All" 
label var AverageFlowCalibr "Average across Calibration only" 
label var FlowRangeR "Max - Min of All m3/h values"

gen AverageUncontrolledMass12 = (AverageFlowAll*2500*12*30)/(1000*1000)
gen AverageUncontrolledMass16 = (AverageFlowAll*2500*16*30)/(1000*1000)

//Assumptions:
//1. Outlet conc. 2500 mg/Nm3 (conservative)
//2. 12 hours or 16 hours of operation
//3. 30-day period per month
//Note: Divide by 1000*1000 to go from mg/month to kg/month

label var AverageUncontrolledMass12 "Average Uncontrolled PM Mass (kg)/month (12h/day)"
label var AverageUncontrolledMass16 "Average Uncontrolled PM Mass (kg)/month (16h/day)"

gen Efrac12 = AveragePMMassmonth/AverageUncontrolledMass12
gen Efrac16 = AveragePMMassmonth/AverageUncontrolledMass16

//If needed to be done on a GPCB-level
collapse (mean) AverageFlowAll AverageFlowCalibr Efrac12 Efrac16 (sum) AveragePMMassmonth AverageUncontrolledMass12 AverageUncontrolledMass16, by(gpcb_id)

save "$EMISSIONS_DATA_OUT/potential_max_emissions", replace
